#!/usr/bin/env python
# -*- conding: utf-8 -*-

"""
@Time     : 2024/10/23 6:38
@Author   : liujingmao
@File     : 1.Cohere重排序示例.py
"""

import dotenv
import weaviate
from langchain.retrievers import ContextualCompressionRetriever
from langchain_cohere import CohereRerank
from langchain_openai import OpenAIEmbeddings
from langchain_weaviate import WeaviateVectorStore
from weaviate.auth import AuthApiKey

dotenv.load_dotenv()

# 1.创建向量数据库与重排组件
embedding = OpenAIEmbeddings(model="text-embedding-3-small")
db = WeaviateVectorStore(
    client=weaviate.connect_to_wcs(
        # cluster_url="https://mbakeruerziae6psyex7ng.c0.us-west3.gcp.weaviate.cloud",
        cluster_url="https://1blxxsp7tq6lenouzruprg.c0.europe-west3.gcp.weaviate.cloud",
        # auth_credentials=AuthApiKey("ZltPVa9ZSOxUcfafelsggGyyH6tnTYQYJvBx"),
        auth_credentials=AuthApiKey("ax0pfIJIo8HvGyfUd6QRd6inMafTQ0X6DUsW"),
    ),
    index_name="DatasetDemo",
    text_key="text",
    embedding=embedding,
)
rerank = CohereRerank(model="rerank-multilingual-v3.0")

# 2.构建压缩检索器
retriever = ContextualCompressionRetriever(
    base_retriever=db.as_retriever(search_type="mmr"),
    base_compressor=rerank,
)

# 3.执行搜索并排序
search_docs = retriever.invoke("关于LLMOps应用配置的信息有哪些呢？")
print(search_docs)
print(len(search_docs))

"""
ERROR: pip's dependency resolver does not currently take into account all the packages that 
are installed. This behaviour is the source of the following dependency conflicts.
datasets 2.21.0 requires dill<0.3.9,>=0.3.0, but you have dill 0.3.9 which is incompatible.
grpcio-health-checking 1.66.1 requires protobuf<6.0dev,>=5.26.1, but you have protobuf 4.25.5 which is incompatible.
grpcio-tools 1.66.1 requires protobuf<6.0dev,>=5.26.1, but you have protobuf 4.25.5 which is incompatible.
langchain-pinecone 0.1.3 requires langchain-core<0.3,>=0.1.52, but you have langchain-core 0.3.12 which is incompatible.
tcvectordb 1.3.13 requires urllib3<=1.26.19, but you have urllib3 2.2.3 which is incompatible.

"""
